In [1]:
%cd ..
In [2]:
import pandas as pd
import plotly.plotly as py
from plotly.graph_objs import *
from mykeys import tw
import networkx as nx
import itertools
In [51]:
get_party_representation()
Out[51]:
In [66]:
get_party_activity()
Out[66]:
In [67]:
plot_MP_activity()
Out[67]:
In [68]:
df = pd.read_csv('data/TR-tweeps-En.csv',encoding='utf-8')
x = ['AKP', 'CHP', 'MHP', 'HDP','Independent']
df = df[df.profs.str.contains('|'.join(x))]
df
Out[68]:
In [69]:
# group by titles
tweets = pd.read_csv('data/TR-tweets.csv',encoding='utf-8',parse_dates=['dt'])
tweets.twhandle = tweets.twhandle.str[1:]
title = tweets.groupby(by=['title','dt'])['twhandle']
In [70]:
# get monitored screen_names for each party
df = pd.read_csv('data/TR-tweeps-En.csv',encoding='utf-8')
groups = ['AKP', 'CHP', 'MHP', 'HDP','Independent']
group = {}
mps = set()
for p in groups:
group[p] = set(df[df.profs.str.contains(p)].twhandle.tolist())
mps.update(group[p])
In [73]:
# get number of MPs of each party for each title
news = {}
def get_twbyp(g,partililer):
tweeps = g.values.tolist()
return len([t for t in tweeps if t in partililer])
for p in groups:
news[p] = title.apply(lambda g: get_twbyp(g,group[p]))
#news[p] = title.apply(lambda g: len(group[p] & set(g.values))) # number of MPs for the title
In [74]:
#sum all commentated titles up,
polnews = pd.Series(index=news['Independent'].index,data=0)
for p in groups:
print (news[p].sum(),'comments made on',news[p][news[p]>0].size,'news by',p)
polnews = polnews.add(news[p])
print (polnews.sum(),'comments made on',polnews[polnews>0].size,'news by TBMM (any MP)')
In [10]:
# for each party calculate diff = party - others
diff = {}
for p in groups:
diff[p] = 2*news[p] - polnews
In [11]:
# the most commentated events by all MPS
polnews.order(ascending=False).head(60)
Out[11]:
In [12]:
# Number of commentations on events by AKP MPs - Other MPs
diff['AKP'].order(ascending=False)
Out[12]:
In [13]:
# Number of commentations on events by CHP MPs - Other MPs
diff['CHP'].order(ascending=False)
Out[13]:
In [14]:
# Number of commentations on events by MHP MPs - Other MPs
diff['MHP'].order(ascending=False)
Out[14]:
In [15]:
# Number of commentations on events by HDP MPs - Other MPs
diff['HDP'].order(ascending=False)
Out[15]:
In [16]:
# Number of commentations on events by Independent MPs - Other MPs
diff['Independent'].order(ascending=False)
Out[16]:
In [17]:
G=nx.Graph()
# add vertices
for p in groups:
for politician in group[p]:
G.add_node(politician, group=p)
In [18]:
def updateG(group,G,politicians):
""" Create weighted edges """
edges = itertools.combinations(group.tolist(), 2)
for v1,v2 in edges:
if v1 not in politicians or v2 not in politicians:
continue
if G.has_edge(v1, v2):
G[v1][v2]['weight'] += 1
else:
G.add_edge(v1, v2, weight=1)
In [19]:
# add edges
title.apply(updateG,G,mps)
# print number of nodes and edges
print (G.number_of_nodes(),G.number_of_edges())
In [22]:
# Exporting to be read by Gephi for better visualization
nx.write_gml(G,"data/nediyor_politics.gml")
See the interactive output here: TBMM Co-commentation Network Visualization
In [28]:
# calculate the likelihood of tie formation among parties
# AKP-AKP, AKP-CHP, AKP-MHP, AKP-HDP, AKP-Bagimsiz
G = nx.read_gml("data/nediyor_politics.gml")
In [69]:
def get_likelihood(g1,g2):
""" returns tie formation likelihood of group1 and group2"""
groups = ['AKP', 'CHP', 'MHP', 'HDP','Independent']
likelihoods = []
for politician in G:
if G.node[politician]['group'] != g1:
continue
g2weight = .0 # sum of edge weights of nbors whose party is g2
total = .0 # sum of all edge weights (all neighbors)
for nbor in G[politician]:
if G.node[nbor]['group'] == g2:
g2weight += G[politician][nbor]['weight']
total += G[politician][nbor]['weight']
if total > 0:
likelihoods.append(g2weight/total)
l = sum(likelihoods)/len(likelihoods)
print(g1+'-'+g2+' likelihood {:.2f}'.format(l))
return l
In [72]:
groups = ['AKP', 'CHP', 'MHP', 'HDP','Independent']
for g1 in groups:
for g2 in groups:
if groups.index(g1)>groups.index(g2):
continue
get_likelihood(g1,g2)
In [ ]:
#community detection
import
In [65]:
def get_party_representation():
""" Plot Tweep Rate by (i) MP counts, (ii) Monitored MP counts """
#http://www.tbmm.gov.tr/develop/owa/milletvekillerimiz_sd.dagilim
chairs = {'AKP':312, 'CHP':125, 'HDP':27, 'MHP':52, 'Independent':13}
df = pd.read_csv('data/TR-tweeps-En.csv',encoding='utf-8')
groups = ['AKP', 'CHP', 'MHP', 'HDP','Independent']
group = {}
mps = set()
for p in groups:
group[p] = set(df[df.profs.str.contains(p)].twhandle.tolist())
mps.update(group[p])
y1=[int((len(group[p])*1.0/chairs[p])*100+0.5) for p in groups]
y2=[len(group[p]) for p in groups]
trace1 = Bar(x=groups, y=y1, name='Percentage of MPs normalized by # of chairs')
trace2 = Bar(x=groups, y=y2, name='Plain number of MPs monitored')
data = Data([trace1, trace2])
layout = Layout(autosize=True,
title='Members of TBMM monitored',
barmode='group',
legend=Legend(xanchor='right',yanchor='top', y=1,traceorder='normal'),
yaxis=YAxis(title='MPs monitored by nediyor.com'),
#yaxis2=YAxis(title='Percentage of MPs',side='right',overlaying='y',anchor='x')
)
fig = Figure(data=data, layout=layout)
return py.iplot(fig,filename='MPs monitored by nediyor.com')
def get_party_activity():
""" Plot Twitter Usage normalized by (i) MP counts, (ii) Monitored MP counts """
# plot commentating activity of parties
df = pd.read_csv('data/TR-tweeps-En.csv',encoding='utf-8')
x = ['AKP', 'CHP', 'MHP', 'HDP','Independent']
df = df[df.profs.str.contains('|'.join(x))]
mparty = dict(zip(df.twhandle, df.profs.map(lambda mp: [p for p in x if p in mp][0])))
tweets = pd.read_csv('data/TR-tweets.csv',encoding='utf-8',parse_dates=['dt'])
tweets['twhandle']=tweets['twhandle'].str[1:]
tweets = tweets.rename(columns={'twtext':'twcnt'})
tweeps = tweets.groupby(by='twhandle')['twcnt'].count().order(ascending=False)
poltweeps = tweeps[tweeps.index.isin(df.twhandle)]
vekil = pd.DataFrame.from_dict(mparty,orient='index')
vekil.index.name = 'twhandle'
vekil.columns = ['party']
df = pd.DataFrame(poltweeps).join(vekil)
#http://www.tbmm.gov.tr/develop/owa/milletvekillerimiz_sd.dagilim
parties = {'AKP':312, 'CHP':125, 'HDP':27, 'MHP':52, 'Independent':13}
mpc = sum(parties.values())
# Parties' newsworthy tweet counts normalized by MP counts
perMP = {}
for p,cnt in parties.items():
perMP[p] = df[df.party == p].twcnt.sum()/float(cnt)
# Parties' newsworthy tweet counts normalized by MPs with Twitter accounts
perTweep = {}
for p,cnt in parties.items():
perTweep[p] = df[df.party == p].twcnt.sum()/float(df[df.party == p].shape[0])
x = ['AKP', 'CHP', 'MHP', 'HDP','Independent']
y1 = [int((perMP[p] * 100) + 0.5) / 100.0 for p in x]
y2 = [int((perTweep[p] * 100) + 0.5) / 100.0 for p in x]
#tweep/mp ratio
# y = [df[df.profs.str.contains(p)].shape[0] for p in x]
texts = []
for p in x:
twpset = poltweeps.index.tolist()
text = u''
for i,twp in enumerate(twpset):
#twp = '<a href="http://twitter.com/'+twp[1:]+'">'+ twp +'</a>'
text+= ' '+ twp if (i+1) % 10 != 0 else '<br>'+twp
texts.append(text)
trace1 = Bar(x=x, y=y1, name='normalized by MP counts') #, text = texts
trace2 = Bar(x=x, y=y2, name='normalized by curated tweep counts')
data = Data([trace1, trace2])
layout = Layout(autosize=True,
title="Newsworthy Tweet Counts",
barmode='group',
legend=Legend(x=0, y=1,traceorder='normal'),
yaxis=YAxis(title='Number of newsworthy tweets per MP'),
annotations=Annotations([
Annotation(
text='Code and datasets are available on <a href="https://github.com/oztalha/News-Commentary-Tweets-of-Elites">github.com/oztalha</a><br>'\
'Two years of curated tweets are scraped from <a href="http://nediyor.com/">nediyor.com</a>',
showarrow=False,
x=0.01,
y=0.85,
xref='paper',
yref='paper',
xanchor='left',
yanchor='top',
align='left'
),
]))
fig = Figure(data=data, layout=layout)
return py.iplot(fig, filename='Newsworthy Tweet Counts of Turkish Parties')
def plot_MP_activity():
# plot commentating activity of these politicians
df = pd.read_csv('data/TR-tweeps-En.csv',encoding='utf-8')
x = ['AKP', 'CHP', 'MHP', 'HDP','Independent']
df = df[df.profs.str.contains('|'.join(x))]
mparty = dict(zip(df.twhandle, df.profs.map(lambda mp: [p for p in x if p in mp][0])))
tweets = pd.read_csv('data/TR-tweets.csv',encoding='utf-8',parse_dates=['dt'])
tweets['twhandle']=tweets['twhandle'].str[1:]
tweeps = tweets.groupby(by='twhandle')['twtext'].count().order(ascending=False)
poltweeps = tweeps[tweeps.index.isin(df.twhandle)]
partycolors = {'AKP':'orange', 'CHP':'red', 'MHP':'green', 'HDP':'purple','Independent':'cyan'}
colors = [partycolors[mparty[x]] for x in poltweeps.index]
data = Data([Bar(
x=poltweeps.index,
y=poltweeps.values,
marker=Marker(color=colors)
)])
layout = Layout(autosize=True, margin=Margin(b=111),
yaxis=YAxis(title='# of news commentated (Jan 2013 - Jan 2015)'),
title="News commentated by Members of TBMM (curated by nediyor.com)")
fig = Figure(data=data, layout=layout)
return py.iplot(fig,filename="TBMM as News Commentators")
In [13]:
tweets = pd.read_csv('data/TR-tweets.csv',encoding='utf-8',parse_dates=['dt'])
tweets['twhandle']=tweets['twhandle'].str[1:]
tweeps = tweets.groupby(by='twhandle')['twtext'].count().order(ascending=False)
poltweeps = tweeps[tweeps.index.isin(df.twhandle)]
In [77]:
for k,v in mparty.items():
if v == 'Independent':
print(k)
In [49]:
for p,cnt in parties.items():
print(p,df[df.party == p].twcnt.sum(), df[df.party == p].shape[0])